SGD

对权重张量执行带动量与权重衰减的随机梯度下降更新。

输入:
  • weight - 待更新权重张量首地址。

  • accumulate - 动量累积张量首地址。

  • gradient - 梯度张量首地址。

  • learning_rate - 学习率。

  • dampening - 动量阻尼系数。

  • moment - 动量系数。

  • nesterov - 是否启用 Nesterov 动量。

  • weight_decay - 权重衰减系数。

  • start - 参与计算的起始索引(闭区间)。

  • end - 参与计算的结束索引(开区间)。

  • core_mask(int, 可选) - 核掩码(仅适用于共享存储版本)。

输出:
  • weight - 原地写回更新后的权重张量。

  • accumulate - 原地写回更新后的动量张量。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持 fp32 数据类型。

  • MT7004 支持 fp16、fp32 数据类型。

共享存储版本:

void hp_sgd_s(half *weight, half *accumulate, const half *gradient, float learning_rate, float dampening, float moment, bool nesterov, float weight_decay, int start, int end, int core_mask)
void fp_sgd_s(float *weight, float *accumulate, const float *gradient, float learning_rate, float dampening, float moment, bool nesterov, float weight_decay, int start, int end, int core_mask)
\[\begin{split}\begin{aligned} g'_t &= g_t + weight\_decay \cdot w_{t-1} \\ m_t &= moment \cdot m_{t-1} + (1 - dampening) \cdot g'_t \\ u_t &= \begin{cases} m_t \cdot moment + g'_t, & \text{if nesterov = True} \\ m_t, & \text{otherwise} \end{cases} \\ w_t &= w_{t-1} - learning\_rate \cdot u_t \end{aligned}\end{split}\]

C调用示例:

 1// FT78NE 多核示例
 2#include <stdio.h>
 3#include <stdbool.h>
 4
 5int main(void) {
 6    float *weight = (float *)0xA0000000;      // DDR 存储
 7    float *accumulate = (float *)0xB0000000;
 8    float *gradient = (float *)0xC0000000;
 9    int start = 0;
10    int end = 4096;
11    int core_mask = 0xff;
12    float learning_rate = 1e-2f;
13    float dampening = 0.0f;
14    float moment = 0.9f;
15    bool nesterov = true;
16    float weight_decay = 1e-2f;
17    fp_sgd_s(weight, accumulate, gradient, learning_rate,
18             dampening, moment, nesterov, weight_decay,
19             start, end, core_mask);
20    return 0;
21}

私有存储版本:

void hp_sgd_p(half *weight, half *accumulate, const half *gradient, float learning_rate, float dampening, float moment, bool nesterov, float weight_decay, int length)
void fp_sgd_p(float *weight, float *accumulate, const float *gradient, float learning_rate, float dampening, float moment, bool nesterov, float weight_decay, int length)

C调用示例:

 1// MT7004 单核示例
 2#include <stdio.h>
 3#include <stdbool.h>
 4
 5int main(void) {
 6    half *weight = (half *)0x10000000;       // L2 存储
 7    half *accumulate = (half *)0x10002000;
 8    half *gradient = (half *)0x10004000;
 9    int length = 2048;
10    float learning_rate = 5e-3f;
11    float dampening = 0.0f;
12    float moment = 0.9f;
13    bool nesterov = false;
14    float weight_decay = 5e-3f;
15    hp_sgd_p(weight, accumulate, gradient, learning_rate,
16             dampening, moment, nesterov, weight_decay,
17             length);
18    return 0;
19}